import os, geopandas as gpd
states=gpd.read_file(os.path.join("Geodataframes","USA_States.shp"))
type(states)
states.shape
states.columns
states.head()
states[states.isna().any(axis=1)]
states.info()
RangeIndex: 51 entries, 0 to 50
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 STATE_NAME 51 non-null object
1 STATE_FIPS 51 non-null object
2 STATE_ABBR 51 non-null object
3 geometry 51 non-null geometry
dtypes: geometry(1), object(3)
memory usage: 1.7+ KB
states.plot()
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
capitals=gpd.read_file(os.path.join("Geodataframes","USA_Capitals.shp"))
rivers=gpd.read_file(os.path.join("Geodataframes","USA_Hydrography.shp"))
lakes=gpd.read_file(os.path.join("Geodataframes","USA_Lakes.shp"))
capitals.plot()
rivers.plot()
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
lakes.plot()
#visualizamos como queremos nuestras capas. Elegimos colores, relleno y el grosor de la linea
states.plot(facecolor="#EEF1F5",#color de relleno
edgecolor='black', #color de las lineas del contorno
linewidth=0.2) #grosor de la linea
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
capitals.plot(marker='.', #forma del marcador
color='red',
markersize=4,
alpha=0.3) #transparencia
rivers.plot(edgecolor='#38B5C6',
linewidth=0.5)
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#verificamos que todos tengan el mismo crs
lakes.crs
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
capitals.crs
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
states.crs
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
rivers.crs #todos tienen el mismo número de crs
#No es necesario usar to_crs()
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
#Armamos el mapa completo con todas las capas y lo personalizamos
base = states.plot(facecolor="#ECF0F1", edgecolor='#2A2A2A', linewidth=0.1,figsize=(12,12))
rivers.plot(edgecolor='#2399B4', linewidth=0.4,
ax=base)
lakes.plot(edgecolor="#2399B4", facecolor="#A9E8F7", linewidth=0.3,
ax=base)
capitals.plot(marker='.', color='red', markersize=2,alpha=0.7,
ax=base)
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#Si deseamos un mapa donde se pueda interactuar con las capas
import folium
m = states.explore(color="#DDCDFC",
name="states")
m = rivers.explore(m=m, color="#2399B4",
name="rivers")
m = lakes.explore(m=m, color="#1075F5",
name="lakes")
m = capitals.explore(m=m, color="red",
name="capitals")
#Para poder navegar entre capas y seleccionar la que queramos usar
folium.LayerControl().add_to(m)
m
#Seleccionamos el estado de Montana para delimitar el mapa
montana=states[states.STATE_NAME=='Montana']
#Recortamos solo la parte de Montana en las capas
states_clipped = gpd.clip(gdf=states,
mask=montana)
rivers_clipped = gpd.clip(gdf=rivers,
mask=montana)
lakes_clipped = gpd.clip(gdf=lakes,
mask=montana)
capitals_clipped = gpd.clip(gdf=capitals,
mask=montana)
#Una vez realizado el corte del mapa podemos visualizarlo y personalizarlo
base = montana.plot(facecolor="#FBB216", edgecolor='#5C45A0', linewidth=0.4,figsize=(6,6))
capitals_clipped.plot(marker='+', color='red', markersize=18,ax=base)
rivers_clipped.plot(edgecolor='blue', linewidth=0.5,ax=base)
lakes_clipped.plot(edgecolor='blue',facecolor="#1075F5", linewidth=0.5,ax=base)
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#Ubicamos las coordenadas de Montana
montanaCoord=[46.59271, -112.03611]
#Hacemos zoom en Montana en el mapa interactivo
m = states.explore(location=montanaCoord,
zoom_start=5.5,
tiles='CartoDB positron',
color='#DDCDFC',
name="states")
m = rivers.explore(m=m, color="#2399B4",
name="rivers")
m = lakes.explore(m=m, color="#1075F5",
name="lakes")
m = capitals.explore(m=m, color="red",
name="capitals")
folium.LayerControl().add_to(m)
m
#Guardemos el mapa no interactivo de Montana
import matplotlib.pyplot as plt
base = montana.plot(facecolor="#FBB216", edgecolor='#5C45A0', linewidth=0.4,figsize=(5,5))
map2=rivers_clipped.plot(edgecolor='blue', linewidth=0.5,ax=base)
map3=lakes_clipped.plot(edgecolor='blue',facecolor="#1075F5", linewidth=0.5,ax=base)
mapEnd=capitals_clipped.plot(marker='+', color='red', markersize=15,ax=base)
plt.savefig(os.path.join("Maps",'mapEnd.jpg'))
#Lo guardamos en la carpeta Maps en el datalore
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#Finalmente exportamos los mapas en un tipo diferente de archivo
states.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='states', driver="GPKG")
rivers.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='rivers', driver="GPKG")
lakes.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='lakes', driver="GPKG")
capitals.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='capitals', driver="GPKG")
#Grabamos el link del github
worldMaps="https://github.com/ThayraSosa/introgdf/raw/main/Maps/worldMaps.gpkg"
from fiona import listlayers
listlayers(worldMaps)
rivers=gpd.read_file(worldMaps,layer='rivers')
lakes=gpd.read_file(worldMaps,layer='lakes')
capitals=gpd.read_file(worldMaps,layer='capitals')
states=gpd.read_file(worldMaps,layer='states')
base = states.plot(facecolor='gainsboro')
capitals.plot(ax=base, markersize=0.5, color='red')
lakes.plot(ax=base, linewidth=0.5)
rivers.plot(ax=base, linewidth=0.5)
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
Working with data
Video tutorial
Plug in multiple data sources
In Datalore you can work with various data sources together in one notebook.
You can connect your SQL databases (such as MySQL, Snowflake, PostgreSQL, Redshift, etc.), bucket storages (AWS S3, GCS buckets), and files (any file types) from the interface and further query and join them in one notebook.
To manage all your data, please use the Attached data tab in the left-hand sidebar.

Reusing data connections
When creating a bucket or a database connection, you’ll be able to reuse it across other notebooks in the same workspace.
You can manage all of the attached data sources from the workspace file system and add new connections directly from the notebook interface.
When you share a notebook or a workspace, your credentials are not exposed to the environment.

Persistent file storage
Datalore comes with persistent storage for each notebook. This means you can upload your data files directly to Notebook files. If you want to share a file across multiple notebooks, attach Workspace files to the notebook and upload it there. Workspace files are mounted under the /data/workspace_files/ directory.
Benefits of attaching files to a notebook
- Your workspace file system won't be cluttered with too many files.
- Notebook files are shared automatically when you invite collaborators to the notebook.
Additionally, you can download data to Notebook or Workspace files or to store it in memory using various Python packages and APIs.
Dataframe exploration
Whenever a pandas dataframe is the result of your cell execution, you get additional tabs in the cell output:
- Table – a scrollable table view of your data
- Raw – this tab represents the raw output without the ability to scroll the data
- Visualize – this tab brings out-of-the box plots to help you visually explore the data
- Statistics – this tab provides essential descriptive statistics for your dataframe
Task: Run the code cell above and navigate to the Visualize and Statistics tabs!

import urllib
import pandas as pd
urllib.request.urlretrieve('https://datalore-samples.s3.eu-west-1.amazonaws.com/datalore_gallery_of_samples/Getting+started/gpus.csv', 'gpus.csv')
data = pd.read_csv("gpus.csv")
data
Viewing and editing attached files
Double-clicking on a .csv or text file opens it for editing in the right sidebar editor and lets you view the file and edit its contents.

If you open a .py script, you will also get smart coding assistance features for editing its contents.

Managing the environment
Video tutorial
Preconfigured environment
Each notebook in Datalore has an isolated environment. This means that when you apply changes to one notebook, they won't affect any of the other notebook environments.
Datalore comes with a lot of Python libraries pre-installed.
We've already installed pandas, NumPy, sklearn, MatplotLib, and Seaborn, so you can start importing the package you need right away.
Datalore supports both the pip and Conda package managers. Pip is chosen by default, but you can always switch to Conda.

Installing additional libraries
- To install additional libraries, upgrade package versions, and remove libraries, go to Environment | Explore tab. The changes will be written into the .yaml file, which will be stored in your Notebook files.
- To install a package from a Git repository, go to Environment | Repositories.
- To install any other dependencies (f.e. non-python dependencies), you can modify the
init.shfile. It will run before the environment is installed.
💡 Packages installed via the Environment tab are persisted when you reopen notebooks. You can also install packages using pip magic commands or Terminal, but they won't be persisted.
You can learn more about this in the Environment manager documentation.
Task: Install and import the datasets library
- Run the code cell below using
Shift+Enter. - Click on the prompt in the error log to search for the datasets library. This prompt will open the Explore tab of the Environment manager.
- Click on the datasets library.
- Click the Install button.
- Restart the kernel.
- Rerun the cell.
import datasets
datasets.__version__
Collaborating with your team
Video tutorial
Sharing a notebook
In Datalore, you can edit notebooks together with your team in real time.
Click on the Share button in the top-right corner and choose your preferred sharing method:
- Share by sending a link (the simpler option).
- Invite collaborators by email (for more granular permissions).
- Share with groups of collaborators. Please contact your Datalore admins to find out whether you have groups integrated with Datalore.
💡 To access notebooks as collaborators, invited users will need to create a Datalore account.

When sharing a notebook you can provide either edit or view access.
-
Edit access will allow collaborators to edit code and attached files and run computations.
⚠️ Note that collaborators will consume the notebook owner's resources.
-
View access will only allow users to see the real-time representation of the notebook.
To track the changes, Datalore has a built-in version control system where you can create history checkpoints and revert the notebook to past versions. Check this out via Tools | History.
You can read more about notebook sharing here.
Task: Invite your colleagues to Datalore!
Try out the real-time collaboration feature with your colleagues. Working together can be a lot of fun. 🚀
To track your collaborator's actions through the notebook and attached files, click on their avatar in the upper right-hand corner and start following along!

Sharing a workspace
In Datalore, you can create and share workspaces.
Workspaces help you organize your work and allow you to easily share multiple notebooks, data connections, files, and reports with your team.

⚠️ Note that the workspace owner's resources will be consumed for all the computations made in the workspace.
Reporting
Video tutorial
Report builder
To share your research results with stakeholders, you can use the Report builder feature via the Tools menu section or by clicking the Build report button in the upper right-hand corner.
You will be able to:
- Arrange the cells on a canvas to make the report look more dashboard-style.
- Hide specific cell inputs and outputs.
- Publish a static or interactive report.

Task: Create a report out of this notebook!
Sharing reports
After you publish a report, it will become available under a link. You can then share it with colleagues even if they don't have a Datalore account – the report will be available for them inside the browser. Each report viewer will get a separate copy of the report and will be able to interact with the controls and rerun the report independently.
You can also access all the workspace reports from the Published reports section in your Workspace file system.
Export as PDF, PY, IPYNB
You can export notebooks in multiple formats, including PDF.
Go to the File menu tab and select the export option you need.
Automations
Video tutorial
Scheduled runs
In Datalore, you can schedule your notebook to run on a regular basis. Go to the Computation tab and create a schedule in the Scheduled runs widget. You'll be able to configure the running interval by using the dropdowns or by specifying a cron string.

Scheduled report updates
When configuring notebook schedules, you can choose to automatically update published reports, delivering regular updates to your stakeholders.
Managing the schedules
You can view and edit all the schedules of the workspace from the file system. You'll be able to view your run results and change the scheduling settings.
Switching between CPU and GPU
When running a notebook, you can choose between available machines according to your needs. The Computation tab will also show you the CPU and RAM load statistics. The computation status bar is located in the bottom right-hand corner of the editor.

Background computation
Switching on Background computation from the Computation tab allows you to keep notebooks running even if you close the browser tab.
Learn more about Background computation here.
⚠️ Be careful when switching on Background computation, as it will consume your computation quota.
Shortcuts 101
Datalore supports a wide variety of Jupyter and PyCharm shortcuts. You can access the full list from Help | Shortcuts or by pressing Shift+F1.

Command mode and Editor mode
There are two modes for shortcuts: Command mode and Editor mode.
When editing a cell's content you are in Editor mode. To access Command mode, which allows you to manipulate the cells themselves, press Esc. To switch back to Editor mode, press Enter.
Some of the most often used shortcuts include:
- Run the selected cell and select below:
Shift+Enter. - Change cell type:
Command+M/Ctrl+M. - Undo action:
Command+Z/Ctrl+Z. - Delete cell:
DD(Command mode). - Insert cell above:
A(Command mode). - Insert cell below:
B(Command mode). - Copy selected cells:
C(Command mode). - Cut selected cells:
X(Command mode). - Paste below:
V(Command mode).
Command palette
Access quick actions using the Command palette from the Help menu tab.

Tutorial Gallery, Updates, Contact info
We regularly post tips & tricks on our blog and onTwitter! Check them out and subscribe to get the most out of Datalore.
Basic tutorials
- Pandas Tutorial: 10 Most Popular Questions for Python Data Frames
- Visualization Tutorial With Seaborn
- Visualization With Pyplot in Datalore
- Interactive Controls Tutorial
- Lets-Plot Usage Guide
- Exploratory Data Analysis in Practice
Cool notebook samples
- 10,000,000 Jupyter Notebooks Analyzed
- Random Forest, Trees, and Stumps 🌳🌴🌲 A General Overview of Binary Classification Models
Webinars
- Is Your Analysis Reproducible? 5 Ways to Make Your Work Bulletproof With Datalore
- 5 Tips for Combining Python and SQL in Datalore
Video Tutorials
How to get support
What really helps us make the Datalore product better is your feedback. Our team is always thankful when you tell us about your experience or report bugs.
Feel free to share your feedback with us and report any issues by:
- Writing a post on our public forum.
- Emailing us at datalore-enterprise@jetbrains.com.